#########
## Replication Data of Reed/Scheiner/Thies
###
setwd("C:\\Users\\jorda\\Documents\\Working Papers\\Candidate vs Party (Paper 2)")

rm(list=ls())

library(ggplot2)
theme_set(theme_bw())
library(foreign)
library(haven)

#####
## First things first, I need to get the data to match between the JHRED data
## and the replication data

jhrdat<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Reed-Smith-JHRED-Data.csv")
jhrdat<-subset(jhrdat,year==1996|year==2000|year==2003|year==2005|year==2009|year==2012|year==2014)

jrrep<-subset(jhrdat,ku_voteshare>0)

repd<-read_dta("C:\\Users\\jorda\\Documents\\Data Sets\\2012 Reed & Scheiner Rep Data\\Quality vs. Party Data Set 2.10.11.dta")
repd<-subset(repd,smdvote>0)
  
### Need to make a coordination failure variable

table(jrrep$party_en)

### Coordination Failure

# For left block, party_id is 5, 12, 16.5,36
#For right block, party_id is 1.5, 3, 31, 33

jrrep$lfbloc<-ifelse(jrrep$party_id==5|jrrep$party_id==12|jrrep$party_id==16.5|jrrep$party_id==36,1,0)
jrrep$rhtbloc<-ifelse(jrrep$party_id==1.5|jrrep$party_id==3|jrrep$party_id==31|jrrep$party_id==33,1,0)

lbloc<-aggregate(jrrep$lfbloc,by=list(jrrep$year,jrrep$kucode),sum)
colnames(lbloc)<-c("year","kucode","lbloc")

rbloc<-aggregate(jrrep$rhtbloc,by=list(jrrep$year,jrrep$kucode),sum)
colnames(rbloc)<-c("year","kucode","rbloc")

jrrep<-merge(jrrep,lbloc,by=c("year","kucode"))
jrrep<-merge(jrrep,rbloc,by=c("year","kucode"))

jrrep$cfail_l<-ifelse(jrrep$party_id==16,jrrep$lbloc-jrrep$rbloc,0)
jrrep$cfail_r<-ifelse(jrrep$party_id==1,jrrep$rbloc-jrrep$lbloc,0)
jrrep$coord_fail<-jrrep$cfail_l+jrrep$cfail_r


######
## The next step is to subset the datasets to just LDP and DPJ candidates

jrdat<-subset(jrrep,party_id==1|party_id==16)

rsd<-subset(repd,pty==1|pty==2)

#####
## Use the returnee variable from the rep data to create a new candidate
## variable

returnee<-data.frame(rsd$year,rsd$ku,rsd$pty,rsd$returnee)
names(returnee)<-c("year","kucode","party_id","returnee")
returnee[returnee$party_id==2,"party_id"]<-16

jrdat<-merge(jrdat,returnee,by=c("year","kucode","party_id"),all=T)

jrdat$newcand<-ifelse(jrdat$inc==0 & jrdat$returnee==0,1,0)

table(jrdat$newcand,jrdat$year)
table(rsd$newcand,rsd$year)

# Potential Alternative Measure

jrdat$returnee2<-ifelse(jrdat$inc==0 & jrdat$totcwins>1,1,0)
jrdat$newcand2<-ifelse(jrdat$inc==0 & jrdat$returnee2==0,1,0)


#####
## Now I need to recreate the key variables from the model in the paper

## DV - Win/Lose

jrdat$win<-ifelse(jrdat$result==1,1,0)

### Candidate Quality

jrdat$cqual<-ifelse(jrdat$bcrat==1|jrdat$hc==1|jrdat$assy==1|
                       jrdat$gov==1|jrdat$mayor==1,1,0)


## Potential issue with the assy dummy variable. It includes local political office
## instead of prefectural assembly membership. It is a looser conception of quality than
## the JJS


### LDP Dummy

jrdat$ldpdum<-ifelse(jrdat$party_id==1,1,0)

### Expenditure in 100,000s of Yen

jrdat$exp100<-jrdat$exp/100000


## Candidate has run before

jrdat$cand96a<-ifelse(jrdat$year==1996,jrdat$pid,0)
cand96<-aggregate(jrdat$cand96a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand96)<-c("kucode","party_id","cand96")

jrdat$cand00a<-ifelse(jrdat$year==2000,jrdat$pid,0)
cand00<-aggregate(jrdat$cand00a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand00)<-c("kucode","party_id","cand00")

jrdat$cand03a<-ifelse(jrdat$year==2003,jrdat$pid,0)
cand03<-aggregate(jrdat$cand03a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand03)<-c("kucode","party_id","cand03")

jrdat$cand05a<-ifelse(jrdat$year==2005,jrdat$pid,0)
cand05<-aggregate(jrdat$cand05a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand05)<-c("kucode","party_id","cand05")

jrdat$cand09a<-ifelse(jrdat$year==2009,jrdat$pid,0)
cand09<-aggregate(jrdat$cand09a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand09)<-c("kucode","party_id","cand09")

jrdat$cand12a<-ifelse(jrdat$year==2012,jrdat$pid,0)
cand12<-aggregate(jrdat$cand12a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand12)<-c("kucode","party_id","cand12")

jrdat$cand14a<-ifelse(jrdat$year==2014,jrdat$pid,0)
cand14<-aggregate(jrdat$cand14a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand14)<-c("kucode","party_id","cand14")

jrdat<-merge(jrdat,cand96,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand00,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand03,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand05,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand09,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand12,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand14,by=c("kucode","party_id"))

jrdat$samecand<-ifelse(jrdat$cand96==jrdat$cand00 & jrdat$year==2000 |
                         jrdat$cand00==jrdat$cand03 & jrdat$year==2003 |
                         jrdat$cand03==jrdat$cand05 & jrdat$year==2005 |
                         jrdat$cand05==jrdat$cand09 & jrdat$year==2009 |
                         jrdat$cand09==jrdat$cand12 & jrdat$year==2012 |
                         jrdat$cand12==jrdat$cand14 & jrdat$year==2014,1,0)

table(jrdat$samecand,jrdat$year)
table(rsd$samecand,rsd$year)


## Opponent is a LDP/DPJ incumbent

jrdat$ldpinc<-ifelse(jrdat$party_id==1 & jrdat$inc==1,1,0)
jrdat$dpjinc<-ifelse(jrdat$party_id==16 & jrdat$inc==1,1,0)

incpty_ldp<-aggregate(jrdat$ldpinc,by=list(jrdat$year,jrdat$kucode),sum)
names(incpty_ldp)<-c("year","kucode","incpty_ldp")
incpty_dpj<-aggregate(jrdat$dpjinc,by=list(jrdat$year,jrdat$kucode),sum)
names(incpty_dpj)<-c("year","kucode","incpty_dpj")

jrdat<-merge(jrdat,incpty_ldp,by=c("year","kucode"))
jrdat<-merge(jrdat,incpty_dpj,by=c("year","kucode"))

jrdat$oppinc<-ifelse(jrdat$incpty_ldp==1 & jrdat$party_id==16 |
                 jrdat$incpty_dpj==1 & jrdat$party_id==1,1,0)


## Koizumi Kids

jrdat$koikid<-ifelse(jrdat$inc==1 & jrdat$totcruns==2 & jrdat$totcwins==1 & 
                       jrdat$party_id==1 & jrdat$year==2009,1,0)


####
##  Dataset Using Prefectural Assembly rather than city or town
####

cqdat<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Candidate Quality Replication Data.csv")
names(cqdat)[1]<-"year"

qdat<-cqdat[,1:6]
qdat<-qdat[-5]
head(qdat)


jtest<-merge(jrdat,qdat,by=c("year","kucode","party_id","pid"),all = T)
jtest$kengi[is.na(jtest$kengi)]<-0
head(jtest)

jtest$cqual2<-ifelse(jtest$bcrat==1|jtest$hc==1|jtest$kengi==1|
                       jtest$gov==1|jtest$mayor==1,1,0)


#######################
#####    Original Analysis
#######################

r1<-glm(smdwin05 ~ quality2 + ldp + coord_fail + cand_spend + samecand +
          opp_inherits + opp_major_sinc + did, family = binomial(link = "probit"),
        data=subset(rsd,year==2000 & newcand==1 ))
summary(r1)

#checks out

## Replication

rep1<-glm(win ~ cqual + ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
          data=subset(jrdat,year==2000 & newcand2==1))
summary(rep1)


## Replication with refined data

rep1<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
          data=subset(jtest,year==2000 & newcand2==1))
summary(rep1)

## This replication is near perfect!

### Each of the models by year
#################################################

m00<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
          data=subset(jtest,year==2000 & newcand2==1))
summary(m00)

#

m03<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
             oppinc + popdensity, family = binomial(link = "probit"),
           data=subset(jtest,year==2003 & newcand2==1))
summary(m03)

#

m05<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
             oppinc + popdensity, family = binomial(link = "probit"),
           data=subset(jtest,year==2005 & newcand2==1))
summary(m05)

#

m09<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
             oppinc + popdensity, family = binomial(link = "probit"),
           data=subset(jtest,year==2009 & newcand2==1))
summary(m09)

#

m12<-glm(win ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
             oppinc + popdensity, family = binomial(link = "probit"),
           data=subset(jtest,year==2012 & newcand2==1))
summary(m12)


### Models of Vote Share
#################################################

vm00<-lm(ku_voteshare*100 ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
           oppinc + popdensity + ku_ncand,
         data=subset(jtest,year==2000 & newcand2==1))
summary(vm00)

#

vm03<-lm(ku_voteshare*100 ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
           oppinc + popdensity + ku_ncand,
         data=subset(jtest,year==2003 & newcand2==1))
summary(vm03)

#

vm05<-lm(ku_voteshare*100 ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
           oppinc + popdensity + ku_ncand,
         data=subset(jtest,year==2005 & newcand2==1))
summary(vm05)

#

vm09<-lm(ku_voteshare*100 ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
           oppinc + popdensity + ku_ncand,
         data=subset(jtest,year==2009 & newcand2==1))
summary(vm09)

#

vm12<-lm(ku_voteshare*100 ~ cqual2 + ldpdum + coord_fail + exp100 + samecand +
           oppinc + popdensity + ku_ncand,
         data=subset(jtest,year==2012 & newcand2==1))
summary(vm12)





################################################################
#############################
###############################################################

#'
#' ## Coefficient Plots
#'
#' Create Data Frames with Coefficient Values
#' 
#' 

#################
######  Plots for Models of Winning a Seat
#############################################
#######

## Let's plot the candidate quality variable and the LDP variables over the 
## election years

(coef1 <- coef(m00)) # coefficient 
(ci1 <- confint(m00, level=0.95)) # 95% confidence interval
cdt1 <- as.data.frame(cbind(coef1, ci1)) # make it a data
colnames(cdt1) <- c("cf","lci","uci") # new names of data
cdt1$name <- "2000 Election" # model name

(coef2 <- coef(m03)) # coefficient 
(ci2 <- confint(m03, level=0.95)) # 95% confidence interval
cdt2 <- as.data.frame(cbind(coef2, ci2)) # make it a data
colnames(cdt2) <- c("cf","lci","uci") # new names of data
cdt2$name <- "2003 Election" # model name

(coef3 <- coef(m05)) # coefficient 
(ci3 <- confint(m05, level=0.95)) # 95% confidence interval
cdt3 <- as.data.frame(cbind(coef3, ci3)) # make it a data
colnames(cdt3) <- c("cf","lci","uci") # new names of data
cdt3$name <- "2005 Election" # model name

(coef4 <- coef(m09)) # coefficient 
(ci4 <- confint(m09, level=0.95)) # 95% confidence interval
cdt4 <- as.data.frame(cbind(coef4, ci4)) # make it a data
colnames(cdt4) <- c("cf","lci","uci") # new names of data
cdt4$name <- "2009 Election" # model name

(coef5 <- coef(m12)) # coefficient 
(ci5 <- confint(m12, level=0.95)) # 95% confidence interval
cdt5 <- as.data.frame(cbind(coef5, ci5)) # make it a data
colnames(cdt5) <- c("cf","lci","uci") # new names of data
cdt5$name <- "2012 Election" # model name

#'
#' Extract, Combine, and Set Variable Names
#' 

# Candidate Quality

cdt_cq<-rbind(cdt1[2,],cdt2[2,],cdt3[2,],cdt4[2,],cdt5[2,])
cdt_cq$name<-rep("Candidate Quality",5)
cdt_cq

# LDP

cdt_ldp<-rbind(cdt1[3,],cdt2[3,],cdt3[3,],cdt4[3,],cdt5[3,])
cdt_ldp$name<-rep("LDP Affiliation",5)
cdt_ldp

cdt_cq$vn <- c("2000","2003","2005","2009","2012")
            
cdt_ldp$vn <- c("2000","2003","2005","2009","2012")


#'
#' Assign Order to Variable Names
#' 
levelset <- c("2000","2003","2005","2009","2012")

cdt_cq$vn <- factor(cdt_cq$vn, levels = rev(levelset))
cdt_ldp$vn <- factor(cdt_ldp$vn, levels = rev(levelset))


## Drop Intercept
cdt1x <- cdt_cq[!cdt_cq$vn %in% c("(Intercept)"),]

ggplot(cdt1x, aes(x=vn)) + 
  # data is cdt1x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=3) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 1) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=16, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=13, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=13, face="bold", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=13, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

###

cdt2x <- cdt_ldp[!cdt_ldp$vn %in% c("(Intercept)"),]

ggplot(cdt2x, aes(x=vn)) + 
  # data is cdt2x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("LDP Affiliation on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )




################
## Plot Side by Side
################

cdt <- rbind(cdt_cq, cdt_ldp)
cdt
# Drop intercept from the output 
# (depending on your preference, you can drop ANY variables by its "vn" value)
# (If You don't want to drop any variables, delete this line)
cdtx <- cdt[!cdt$vn %in% c("(Intercept)"),]

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  facet_grid(. ~ name) +
  # facetting by the model name (name is the model variable created in the data)
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5)
        # facet strip texts
  )

#'
#' ### Draw Plot (Two Models in the same plot, with different linetype)
#' 
#' Optimized for Paper purposes

## use the same data (i.e., cdtx) as the previous plot.

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf,shape=name), size=2, 
             position=position_dodge(width=-0.5)) + 
  # plot point estimate = cf
  # point shape is differentiated by "name" == model name
  # size to control point size
  # position_dodge width to control space between two points in the same row.
  geom_errorbar(aes(ymin=lci,ymax=uci,linetype=name),width=0.3, size = 0.5,
                position=position_dodge(width=-0.5)) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # linetype is differentiated by "name" == model name
  # size to control line width
  # width to control th height of vertical lines at the edges
  # position_dodge width to control space between two lines in the same row.
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  scale_shape_discrete(name="") + 
  # Legend Title for Point Shape
  scale_linetype_discrete(name="") + 
  # Legend Title for Line Type
  xlab("Election Year") + 
  # no grand label for variables
  ylab("Odds of Winning a Seat") + 
  # Label for x axis (for coefficient value)
  ggtitle("Newcomer Candidate Quality and LDP Affiliation on Winning a Seat") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5),
        # facet strip texts
        legend.title = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend title text
        legend.text = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend label text
        legend.position = "top"
        # legend position. You can use "top","bottom","right","left"/like c(0.1,0.1)
  )


###########################################################################################
#######################################################
#############################
###############
##########                  Plots of Predicted Probabilities
###############
#############################
######################################################
############################################################################################

head(jtest)
summary(m00)

nd00 <- with(subset(jtest,year==2000 & newcand2==1), 
                 data.frame(cqual2 = c(0,1,0,1), ldpdum = c(0,0,1,1), coord_fail = mean(coord_fail),
                            exp100 = 96.78, samecand = 1, oppinc = 1, 
                            popdensity = mean(popdensity)))

pd00 <- cbind(nd00,predict(m00, newdata = nd00, type = "link", se = TRUE))

pd00 <- within(pd00, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))
})        

head(pd00)



ggplot(pd00, aes(x = factor(ldpdum), y = PredictedProb)) + 
  geom_boxplot(aes(fill = factor(cqual2)))

table(jtest$coord_fail[jtest$year==2000 & jtest$newcand2==1])




#########################################################
#########################
####   Plots of Models using Vote Share
#########################


(coef1 <- coef(vm00)) # coefficient 
(ci1 <- confint(vm00, level=0.95)) # 95% confidence interval
cdt1 <- as.data.frame(cbind(coef1, ci1)) # make it a data
colnames(cdt1) <- c("cf","lci","uci") # new names of data
cdt1$name <- "2000 Election" # model name

(coef2 <- coef(vm03)) # coefficient 
(ci2 <- confint(vm03, level=0.95)) # 95% confidence interval
cdt2 <- as.data.frame(cbind(coef2, ci2)) # make it a data
colnames(cdt2) <- c("cf","lci","uci") # new names of data
cdt2$name <- "2003 Election" # model name

(coef3 <- coef(vm05)) # coefficient 
(ci3 <- confint(vm05, level=0.95)) # 95% confidence interval
cdt3 <- as.data.frame(cbind(coef3, ci3)) # make it a data
colnames(cdt3) <- c("cf","lci","uci") # new names of data
cdt3$name <- "2005 Election" # model name

(coef4 <- coef(vm09)) # coefficient 
(ci4 <- confint(vm09, level=0.95)) # 95% confidence interval
cdt4 <- as.data.frame(cbind(coef4, ci4)) # make it a data
colnames(cdt4) <- c("cf","lci","uci") # new names of data
cdt4$name <- "2009 Election" # model name

(coef5 <- coef(vm12)) # coefficient 
(ci5 <- confint(vm12, level=0.95)) # 95% confidence interval
cdt5 <- as.data.frame(cbind(coef5, ci5)) # make it a data
colnames(cdt5) <- c("cf","lci","uci") # new names of data
cdt5$name <- "2012 Election" # model name

#'
#' Extract, Combine, and Set Variable Names
#' 

# Candidate Quality

cdt_cq<-rbind(cdt1[2,],cdt2[2,],cdt3[2,],cdt4[2,],cdt5[2,])
cdt_cq$name<-rep("Candidate Quality",5)
cdt_cq

# LDP

cdt_ldp<-rbind(cdt1[3,],cdt2[3,],cdt3[3,],cdt4[3,],cdt5[3,])
cdt_ldp$name<-rep("LDP Affiliation",5)
cdt_ldp

cdt_cq$vn <- c("2000","2003","2005","2009","2012")

cdt_ldp$vn <- c("2000","2003","2005","2009","2012")


#'
#' Assign Order to Variable Names
#' 
levelset <- c("2000","2003","2005","2009","2012")

cdt_cq$vn <- factor(cdt_cq$vn, levels = rev(levelset))
cdt_ldp$vn <- factor(cdt_ldp$vn, levels = rev(levelset))


## Drop Intercept
cdt1x <- cdt_cq[!cdt_cq$vn %in% c("(Intercept)"),]

ggplot(cdt1x, aes(x=vn)) + 
  # data is cdt1x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=3) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 1) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=16, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=13, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=13, face="bold", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=13, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

###

cdt2x <- cdt_ldp[!cdt_ldp$vn %in% c("(Intercept)"),]

ggplot(cdt2x, aes(x=vn)) + 
  # data is cdt2x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("LDP Affiliation on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

################
## Plot Side by Side
################

cdt <- rbind(cdt_cq, cdt_ldp)
cdt
# Drop intercept from the output 
# (depending on your preference, you can drop ANY variables by its "vn" value)
# (If You don't want to drop any variables, delete this line)
cdtx <- cdt[!cdt$vn %in% c("(Intercept)"),]

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  facet_grid(. ~ name) +
  # facetting by the model name (name is the model variable created in the data)
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5)
        # facet strip texts
  )

#'
#' ### Draw Plot (Two Models in the same plot, with different linetype)
#' 
#' Optimized for Paper purposes

## use the same data (i.e., cdtx) as the previous plot.

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf,shape=name), size=2, 
             position=position_dodge(width=-0.5)) + 
  # plot point estimate = cf
  # point shape is differentiated by "name" == model name
  # size to control point size
  # position_dodge width to control space between two points in the same row.
  geom_errorbar(aes(ymin=lci,ymax=uci,linetype=name),width=0.3, size = 0.5,
                position=position_dodge(width=-0.5)) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # linetype is differentiated by "name" == model name
  # size to control line width
  # width to control th height of vertical lines at the edges
  # position_dodge width to control space between two lines in the same row.
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  scale_shape_discrete(name="") + 
  # Legend Title for Point Shape
  scale_linetype_discrete(name="") + 
  # Legend Title for Line Type
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Newcomer Candidate Quality and LDP Affiliation on Voteshare") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5),
        # facet strip texts
        legend.title = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend title text
        legend.text = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend label text
        legend.position = "top"
        # legend position. You can use "top","bottom","right","left"/like c(0.1,0.1)
  )


## 2014 problems---not enough observations? The DV doesn't seem to have enough variation
## only THREE new candidates won in 2014

jtest$incdum<-ifelse(jtest$inc==1,1,0)

rep1<-glm(win ~ cqual2 + ldpdum + coord_fail + samecand + incdum +
            oppinc + popdensity, family = binomial(link = "probit"),
          data=subset(jtest,year==2000))
summary(rep1)


table(jtest$cqual2[jtest$year==2014 & jtest$newcand2==1],jtest$win[jtest$year==2014 & jtest$newcand2==1])

table(jrdat$year[jrdat$newcand2==1],jrdat$party_id[jrdat$newcand2==1])
table(jrdat$party_en)


#
#
#
#
#
#
#
#
#
#
#
#
#
#
################################################################
###########################
#################################################################################
###########################
###########
##  Models using All Candidates rather than Newcomers
###########
###########################
##################################################################################
###########################
##################################################################

jhrdat<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Reed-Smith-JHRED-Data.csv")
jhrdat<-subset(jhrdat,year==1996|year==2000|year==2003|year==2005|year==2009|year==2012|year==2014)

jrrep<-subset(jhrdat,ku_voteshare>0)

repd<-read_dta("C:\\Users\\jorda\\Documents\\Data Sets\\2012 Reed & Scheiner Rep Data\\Quality vs. Party Data Set 2.10.11.dta")
repd<-subset(repd,smdvote>0)

#####
## Coordination Failure

jrrep$lfbloc<-ifelse(jrrep$party_id==5|jrrep$party_id==12|jrrep$party_id==16.5|jrrep$party_id==35|jrrep$party_id==36,1,0)
jrrep$rhtbloc<-ifelse(jrrep$party_id==1.5|jrrep$party_id==3|jrrep$party_id==31|jrrep$party_id==33,1,0)

lbloc<-aggregate(jrrep$lfbloc,by=list(jrrep$year,jrrep$kucode),sum)
colnames(lbloc)<-c("year","kucode","lbloc")

rbloc<-aggregate(jrrep$rhtbloc,by=list(jrrep$year,jrrep$kucode),sum)
colnames(rbloc)<-c("year","kucode","rbloc")

jrrep<-merge(jrrep,lbloc,by=c("year","kucode"))
jrrep<-merge(jrrep,rbloc,by=c("year","kucode"))

jrrep$cfail_l<-ifelse(jrrep$party_id==16,jrrep$lbloc-jrrep$rbloc,0)
jrrep$cfail_r<-ifelse(jrrep$party_id==1,jrrep$rbloc-jrrep$lbloc,0)
jrrep$coord_fail<-jrrep$cfail_l+jrrep$cfail_r

#####
##

jrdat<-subset(jrrep,prcode>0)

jrdat$returnee2<-ifelse(jrdat$inc==0 & jrdat$totcwins>1,1,0)
jrdat$newcand2<-ifelse(jrdat$inc==0 & jrdat$returnee2==0,1,0)

#####
## Now I need to recreate the key variables from the model in the paper

## DV - Win/Lose

jrdat$win<-ifelse(jrdat$result==1,1,0)

## Candidate Quality

jrdat$cqual<-ifelse(jrdat$bcrat==1|jrdat$hc==1|jrdat$assy==1|
                      jrdat$gov==1|jrdat$mayor==1,1,0)

jrdat$cqual2<-ifelse(jrdat$cqual==1 & jrdat$newcand2==1,1,0)

## Incumbent Dummies

jrdat$inc1<-ifelse(jrdat$inc==1,1,0)
jrdat$inc2<-ifelse(jrdat$inc==2,1,0)
jrdat$inc3<-ifelse(jrdat$inc==3,1,0)
jrdat$inc4<-ifelse(jrdat$inc==4,1,0)
jrdat$inc5<-ifelse(jrdat$inc==5,1,0)
jrdat$inc6<-ifelse(jrdat$inc==6,1,0)
jrdat$inc7<-ifelse(jrdat$inc==7,1,0)

## General Incumbent Dummy

jrdat$incdum<-ifelse(jrdat$inc1==1|jrdat$inc1==2|jrdat$inc1==3,1,0)

### LDP Dummy

jrdat$ldpdum<-ifelse(jrdat$party_id==1,1,0)

### Expenditure in 100,000s of Yen

jrdat$exp100<-jrdat$exp/100000


## Candidate has run before

jrdat$cand96a<-ifelse(jrdat$year==1996,jrdat$pid,0)
cand96<-aggregate(jrdat$cand96a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand96)<-c("kucode","party_id","cand96")

jrdat$cand00a<-ifelse(jrdat$year==2000,jrdat$pid,0)
cand00<-aggregate(jrdat$cand00a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand00)<-c("kucode","party_id","cand00")

jrdat$cand03a<-ifelse(jrdat$year==2003,jrdat$pid,0)
cand03<-aggregate(jrdat$cand03a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand03)<-c("kucode","party_id","cand03")

jrdat$cand05a<-ifelse(jrdat$year==2005,jrdat$pid,0)
cand05<-aggregate(jrdat$cand05a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand05)<-c("kucode","party_id","cand05")

jrdat$cand09a<-ifelse(jrdat$year==2009,jrdat$pid,0)
cand09<-aggregate(jrdat$cand09a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand09)<-c("kucode","party_id","cand09")

jrdat$cand12a<-ifelse(jrdat$year==2012,jrdat$pid,0)
cand12<-aggregate(jrdat$cand12a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand12)<-c("kucode","party_id","cand12")

jrdat$cand14a<-ifelse(jrdat$year==2014,jrdat$pid,0)
cand14<-aggregate(jrdat$cand14a,by=list(jrdat$kucode,jrdat$party_id),sum)
names(cand14)<-c("kucode","party_id","cand14")

jrdat<-merge(jrdat,cand96,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand00,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand03,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand05,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand09,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand12,by=c("kucode","party_id"))
jrdat<-merge(jrdat,cand14,by=c("kucode","party_id"))

jrdat$samecand<-ifelse(jrdat$cand96==jrdat$cand00 & jrdat$year==2000 |
                         jrdat$cand00==jrdat$cand03 & jrdat$year==2003 |
                         jrdat$cand03==jrdat$cand05 & jrdat$year==2005 |
                         jrdat$cand05==jrdat$cand09 & jrdat$year==2009 |
                         jrdat$cand09==jrdat$cand12 & jrdat$year==2012 |
                         jrdat$cand12==jrdat$cand14 & jrdat$year==2014,1,0)

## Opponent is a LDP/DPJ incumbent

jrdat$ldpinc<-ifelse(jrdat$party_id==1 & jrdat$inc==1,1,0)
jrdat$dpjinc<-ifelse(jrdat$party_id==16 & jrdat$inc==1,1,0)

incpty_ldp<-aggregate(jrdat$ldpinc,by=list(jrdat$year,jrdat$kucode),sum)
names(incpty_ldp)<-c("year","kucode","incpty_ldp")
incpty_dpj<-aggregate(jrdat$dpjinc,by=list(jrdat$year,jrdat$kucode),sum)
names(incpty_dpj)<-c("year","kucode","incpty_dpj")

jrdat<-merge(jrdat,incpty_ldp,by=c("year","kucode"))
jrdat<-merge(jrdat,incpty_dpj,by=c("year","kucode"))

jrdat$oppinc<-ifelse(jrdat$incpty_ldp==1 & jrdat$party_id!=16 |
                       jrdat$incpty_dpj==1 & jrdat$party_id==1,1,0)



####
##  Dataset Using Prefectural Assembly rather than city or town
####

cqdat<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Candidate Quality Replication Data.csv")
names(cqdat)[1]<-"year"

qdat<-cqdat[,1:6]
qdat<-qdat[-5]
head(qdat)


jtest<-merge(jrdat,qdat,by=c("year","kucode","party_id","pid"),all = T)
jtest$kengi[is.na(jtest$kengi)]<-0
head(jtest)

jtest$cqual3<-ifelse(jtest$bcrat==1|jtest$hc==1|jtest$kengi==1|
                       jtest$gov==1|jtest$mayor==1,1,0)




##########################

im00<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
            data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(im00)

#

im03<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
            data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(im03)

#

im05<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
            data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(im05)

#

im09<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
            data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(im09)

#

im12<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + exp100 + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
            data=subset(jtest,year==2012 & (party_id==1|party_id==16)))
summary(im12)


#

im14<-glm(win ~ inc1 + inc2 + inc3 + cqual3 +
            ldpdum + coord_fail + samecand +
            oppinc + popdensity, family = binomial(link = "probit"),
          data=subset(jtest,year==2014))
summary(im14)

### Take a look at other opposition parties besides DPJ (not communists)
### Think about 

### Models of Vote Share
#################################################

vm00<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
           exp100 + samecand + oppinc + popdensity + ku_ncand,
           data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(vm00)

#

vm03<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
            exp100 + samecand + oppinc + popdensity + ku_ncand,
          data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(vm03)

#

vm05<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
            exp100 + samecand + oppinc + popdensity + ku_ncand,
          data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(vm05)

#

vm09<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
            exp100 + samecand + oppinc + popdensity + ku_ncand,
          data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(vm09)

#

vm12<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
            exp100 + samecand + oppinc + popdensity + ku_ncand,
          data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(vm12)

#

vm14<-lm(ku_voteshare*100 ~ inc1 + inc2 + inc3 + cqual3 + ldpdum + coord_fail + 
            samecand + oppinc + popdensity + ku_ncand,
          data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(vm14)

#######################
### Model with incumbency dummy
##########################################

jtest$incdum<-ifelse(jtest$inc==1 | jtest$inc==2 | jtest$inc==3 ,1,0)

#

incm00<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
             oppinc + ku_ncand + exp100,
           data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(incm00)

#

incm03<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
             oppinc + ku_ncand,
           data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(incm03)

#

incm05<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
             oppinc + ku_ncand,
           data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(incm05)

#

incm09<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
             samecand + oppinc + ku_ncand,
           data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(incm09)

#

incm12<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
             oppinc + ku_ncand,
           data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(incm12)

#

incm14<-lm(ku_voteshare*100 ~ incdum + cqual3 + ldpdum + 
           oppinc + ku_ncand,
         data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(incm14)


#####################
######### Models for Probability of Winning

winc00<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
             oppinc + popdensity,
             family = binomial(link = "probit"),
             data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(winc00)

winc00_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(winc00_ols)


#

winc03<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              family = binomial(link = "probit"),
              data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(winc03)

winc03_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(winc03_ols)

#

winc05<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              family = binomial(link = "probit"),
              data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(winc05)

winc05_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
                 oppinc + popdensity,
               data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(winc05_ols)

#

winc09<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              family = binomial(link = "probit"),
              data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(winc09)

winc09_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
                 oppinc + popdensity,
               data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(winc09_ols)

#

winc12<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              family = binomial(link = "probit"),
              data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc12)

winc12_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
                 oppinc + popdensity,
               data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc12_ols)

# Sensitivity

winc12_sens<-lm(win ~ cqual3 + ldpdum,
               data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc12_sens)

#

winc14<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc + popdensity,
              family = binomial(link = "probit"),
              data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc14)

winc14_ols<-lm(win ~ incdum + cqual3 + ldpdum + coord_fail +
                 oppinc + popdensity,
               data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc14_ols)

# Sensitivity

winc14_sens<-lm(win ~ cqual3 + ldpdum,
               data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc14_sens)


###################################################################################
#############
############   Use 2017 ATES for Pr(Winning) in 2017
################################
##################################

ates<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japan House Candidate Interviews\\2017 Candidate Interviews.csv")

head(ates)

atdf<-data.frame(ates$ID,ates$SEX,ates$PREFEC,ates$DISTRICT,ates$PR,ates$PARTY,ates$INCUMB,
                 ates$TERM,ates$RESULT,ates$MINSHIN)

names(atdf)<-c("ID","sex","pref","dist","pr","party","incum","term","result","minshin")
head(atdf)

atdf$dist<-sprintf("%02d", atdf$dist)

atdf$kucode<-as.numeric(paste(atdf$pref, atdf$dist, sep = ""))

#Drop PR

atdf<-subset(atdf,pr==0)

atdf$incdum<-ifelse(atdf$incum==3,1,0)
atdf$cqual3<-ifelse(atdf$incum==2,1,0)
atdf$win<-ifelse(atdf$result==2,1,0)
atdf$ldpdum<-ifelse(atdf$party==1,1,0)

#####
## Coordination Failure

atdf$lfbloc<-ifelse(atdf$party==3||atdf$party==4|atdf$party==5|atdf$party==7|atdf$party==8,1,0)
atdf$rhtbloc<-ifelse(atdf$party==1|atdf$party==2|atdf$party==9,1,0)

lbloc<-aggregate(atdf$lfbloc,by=list(atdf$kucode),sum)
colnames(lbloc)<-c("kucode","lbloc")

rbloc<-aggregate(atdf$rhtbloc,by=list(atdf$kucode),sum)
colnames(rbloc)<-c("kucode","rbloc")

atdf<-merge(atdf,lbloc,by=c("kucode"))
atdf<-merge(atdf,rbloc,by=c("kucode"))

head(atdf)

atdf$cfail_l<-ifelse(atdf$party==7,atdf$lbloc-atdf$rbloc,0)
atdf$cfail_r<-ifelse(atdf$party==1,atdf$rbloc-atdf$lbloc,0)
atdf$coord_fail<-atdf$cfail_l+atdf$cfail_r

table(atdf$coord_fail)

#######
## Opponent is an LDP/DPJ Incumbent

atdf$ldpinc<-ifelse(atdf$party==1 & atdf$incdum==1,1,0)
atdf$kibinc<-ifelse((atdf$party==7 | atdf$party==8) & atdf$incdum==1,1,0)

head(atdf)

incpty_ldp<-aggregate(atdf$ldpinc,by=list(atdf$kucode),sum)
names(incpty_ldp)<-c("kucode","incpty_ldp")
incpty_kib<-aggregate(atdf$kibinc,by=list(atdf$kucode),sum)
names(incpty_kib)<-c("kucode","incpty_kib")

atdf<-merge(atdf,incpty_ldp,by="kucode")
atdf<-merge(atdf,incpty_kib,by="kucode")

atdf$oppinc<-ifelse((atdf$incpty_ldp==1 & atdf$party!=1) |
                      (atdf$incpty_kib==1 & (atdf$party!=7 | atdf$party!=8)),1,0)


###
## Party ID to match JHRED later

atdf$pty<-ifelse(atdf$minshin<50,16,0)

head(atdf)

atdf$party_id<-ifelse(atdf$party==1,1,
                      ifelse(atdf$pty==16,16,
                             ifelse(atdf$party==4,35,0)))




## I went through all the major party candidates that are NOT incumbents and marked which 
## ones were "quality." The next step is to match this with the data.

qdf<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japan House Candidate Interviews\\2017 Cand Qual.csv")
head(qdf)

qdf2<-data.frame(qdf$ID,qdf$Quality,qdf$Return)
names(qdf2)<-c("ID","Quality","Return")

atdf<-merge(atdf,qdf2,by=("ID"), all=T)
atdf[is.na(atdf)]<-0

head(atdf)

## Now I need to match the quality variable with that from 2014. I'll take all the winners
## from the 2014 election and use their quality in that dataset

## Matching is tricky. I'll use party affiliation and district 

qdf3<-subset(jtest,year==2014 & win==1)
head(qdf3)

qual<-data.frame(table(qdf3$kucode[qdf3$cqual3==1]),qdf3$party_id[qdf3$cqual3==1])
names(qual)<-c("kucode","Quality","party_id")
qual$incdum<-1
head(qual)


incdf<-subset(atdf,incdum==1)
head(incdf)

t<-merge(incdf,qual,by=c("kucode","party_id","incdum"),all=T)
head(t)

t2<-subset(t,Quality.y==0 | Quality.y==1)
head(t2)


t2<-data.frame(t2$kucode,t2$party_id,t2$Quality.y)
names(t2)<-c("kucode","party_id","Quality2")

at<-merge(atdf,t2,by=c("kucode","party_id"),all=T)
head(at)
at[is.na(at)]<-0

at$cqual<-at$Quality+at$Quality2
at$cqual3<-ifelse(at$cqual==1|at$incum==2,1,0)
at$cqual4<-ifelse(at$cqual==1|at$cqual==2|at$cqual==3|at$incum==2,1,0)

### Match population density from 2014 with district info

mdat<-subset(jtest,year==2014)
popmdat<-data.frame(mdat$kucode,mdat$popdensity)
names(popmdat)<-c("kucode","popdensity")

att<-merge(at,popmdat,by=c("kucode"))
att<-att[!duplicated(att), ]

###########
##### Model of candidate Winning a Seat

winc17<-glm(win ~ incdum + cqual4 + ldpdum + coord_fail +
              oppinc + popdensity,
            family = binomial(link = "probit"),
            data=subset(att, party==1 | party==4 | party==7 | party==8))
summary(winc17)

winc17_ols<-lm(win ~ incdum + cqual + ldpdum + coord_fail +
                 oppinc + popdensity,
               data=subset(att, party==1 | party==4 | party==7 | party==8))
summary(winc17_ols)

# Sensitivity

winc17_ols<-lm(win ~ cqual3 + ldpdum,
               data=subset(att, party==1 | party==4 | party==7 | party==8))
summary(winc17_ols)

###########################################################################################
#########################################################
#########################
####   Plots of Models using Vote Share (Incumbency Dummy)
#########################
#########################################################
###########################################################################################

(coef1 <- coef(incm00)) # coefficient 
(ci1 <- confint(incm00, level=0.95)) # 95% confidence interval
cdt1 <- as.data.frame(cbind(coef1, ci1)) # make it a data
colnames(cdt1) <- c("cf","lci","uci") # new names of data
cdt1$name <- "2000 Election" # model name

(coef2 <- coef(incm03)) # coefficient 
(ci2 <- confint(incm03, level=0.95)) # 95% confidence interval
cdt2 <- as.data.frame(cbind(coef2, ci2)) # make it a data
colnames(cdt2) <- c("cf","lci","uci") # new names of data
cdt2$name <- "2003 Election" # model name

(coef3 <- coef(incm05)) # coefficient 
(ci3 <- confint(incm05, level=0.95)) # 95% confidence interval
cdt3 <- as.data.frame(cbind(coef3, ci3)) # make it a data
colnames(cdt3) <- c("cf","lci","uci") # new names of data
cdt3$name <- "2005 Election" # model name

(coef4 <- coef(incm09)) # coefficient 
(ci4 <- confint(incm09, level=0.95)) # 95% confidence interval
cdt4 <- as.data.frame(cbind(coef4, ci4)) # make it a data
colnames(cdt4) <- c("cf","lci","uci") # new names of data
cdt4$name <- "2009 Election" # model name

(coef5 <- coef(incm12)) # coefficient 
(ci5 <- confint(incm12, level=0.95)) # 95% confidence interval
cdt5 <- as.data.frame(cbind(coef5, ci5)) # make it a data
colnames(cdt5) <- c("cf","lci","uci") # new names of data
cdt5$name <- "2012 Election" # model name

(coef6 <- coef(incm14)) # coefficient 
(ci6 <- confint(incm14, level=0.95)) # 95% confidence interval
cdt6 <- as.data.frame(cbind(coef6, ci6)) # make it a data
colnames(cdt6) <- c("cf","lci","uci") # new names of data
cdt6$name <- "2014 Election" # model name

#'
#' Extract, Combine, and Set Variable Names
#' 

# Candidate Quality

head(cdt1[3,])

cdt_cq<-rbind(cdt1[3,],cdt2[3,],cdt3[3,],cdt4[3,],cdt5[3,],cdt6[3,])
cdt_cq$name<-rep("Candidate Quality",6)
cdt_cq

# LDP

cdt_ldp<-rbind(cdt1[4,],cdt2[4,],cdt3[4,],cdt4[4,],cdt5[4,],cdt6[4,])
cdt_ldp$name<-rep("LDP Affiliation",6)
cdt_ldp

cdt_cq$vn <- c("2000","2003","2005","2009","2012","2014")

cdt_ldp$vn <- c("2000","2003","2005","2009","2012","2014")


#'
#' Assign Order to Variable Names
#' 
levelset <- c("2000","2003","2005","2009","2012","2014")

cdt_cq$vn <- factor(cdt_cq$vn, levels = rev(levelset))
cdt_ldp$vn <- factor(cdt_ldp$vn, levels = rev(levelset))


## Drop Intercept
cdt1x <- cdt_cq[!cdt_cq$vn %in% c("(Intercept)"),]

ggplot(cdt1x, aes(x=vn)) + 
  # data is cdt1x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=3) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 1) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality on Voteshare") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=16, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=13, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=13, face="bold", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=13, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

###

cdt2x <- cdt_ldp[!cdt_ldp$vn %in% c("(Intercept)"),]

ggplot(cdt2x, aes(x=vn)) + 
  # data is cdt2x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("LDP Affiliation on Voteshare") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

################
## Plot Side by Side
################

cdt <- rbind(cdt_cq, cdt_ldp)
cdt
# Drop intercept from the output 
# (depending on your preference, you can drop ANY variables by its "vn" value)
# (If You don't want to drop any variables, delete this line)
cdtx <- cdt[!cdt$vn %in% c("(Intercept)"),]

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  facet_grid(. ~ name) +
  # facetting by the model name (name is the model variable created in the data)
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Voteshare") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5)
        # facet strip texts
  )

#'
#' ### Draw Plot (Two Models in the same plot, with different linetype)
#' 
#' Optimized for Paper purposes

## use the same data (i.e., cdtx) as the previous plot.

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf,shape=name), size=2, 
             position=position_dodge(width=-0.5)) + 
  # plot point estimate = cf
  # point shape is differentiated by "name" == model name
  # size to control point size
  # position_dodge width to control space between two points in the same row.
  geom_errorbar(aes(ymin=lci,ymax=uci,linetype=name),width=0.3, size = 0.5,
                position=position_dodge(width=-0.5)) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # linetype is differentiated by "name" == model name
  # size to control line width
  # width to control th height of vertical lines at the edges
  # position_dodge width to control space between two lines in the same row.
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  scale_shape_discrete(name="") + 
  # Legend Title for Point Shape
  scale_linetype_discrete(name="") + 
  # Legend Title for Line Type
  xlab(NULL) + 
  # no grand label for variables
  ylab("Predicted Candidate Vote Share") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Voteshare") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5),
        # facet strip texts
        legend.title = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend title text
        legend.text = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend label text
        legend.position = "top"
        # legend position. You can use "top","bottom","right","left"/like c(0.1,0.1)
  )






###########################################################################################
#########################################################
#########################
####   Plots of Models using Probability of Winning
#########################
#########################################################
###########################################################################################


(coef1 <- coef(winc00)) # coefficient 
(ci1 <- confint(winc00, level=0.95)) # 95% confidence interval
cdt1 <- as.data.frame(cbind(coef1, ci1)) # make it a data
colnames(cdt1) <- c("cf","lci","uci") # new names of data
cdt1$name <- "2000 Election" # model name

(coef2 <- coef(winc03)) # coefficient 
(ci2 <- confint(winc03, level=0.95)) # 95% confidence interval
cdt2 <- as.data.frame(cbind(coef2, ci2)) # make it a data
colnames(cdt2) <- c("cf","lci","uci") # new names of data
cdt2$name <- "2003 Election" # model name

(coef3 <- coef(winc05)) # coefficient 
(ci3 <- confint(winc05, level=0.95)) # 95% confidence interval
cdt3 <- as.data.frame(cbind(coef3, ci3)) # make it a data
colnames(cdt3) <- c("cf","lci","uci") # new names of data
cdt3$name <- "2005 Election" # model name

(coef4 <- coef(winc09)) # coefficient 
(ci4 <- confint(winc09, level=0.95)) # 95% confidence interval
cdt4 <- as.data.frame(cbind(coef4, ci4)) # make it a data
colnames(cdt4) <- c("cf","lci","uci") # new names of data
cdt4$name <- "2009 Election" # model name

(coef5 <- coef(winc12)) # coefficient 
(ci5 <- confint(winc12, level=0.95)) # 95% confidence interval
cdt5 <- as.data.frame(cbind(coef5, ci5)) # make it a data
colnames(cdt5) <- c("cf","lci","uci") # new names of data
cdt5$name <- "2012 Election" # model name

(coef6 <- coef(winc14)) # coefficient 
(ci6 <- confint(winc14, level=0.95)) # 95% confidence interval
cdt6 <- as.data.frame(cbind(coef6, ci6)) # make it a data
colnames(cdt6) <- c("cf","lci","uci") # new names of data
cdt6$name <- "2014 Election" # model name

(coef7 <- coef(winc17)) # coefficient 
(ci7 <- confint(winc17, level=0.95)) # 95% confidence interval
cdt7 <- as.data.frame(cbind(coef7, ci7)) # make it a data
colnames(cdt7) <- c("cf","lci","uci") # new names of data
cdt7$name <- "2017 Election" # model name


#'
#' Extract, Combine, and Set Variable Names
#' 

# Candidate Quality

head(cdt1[4,])

cdt_cq<-rbind(cdt1[3,],cdt2[3,],cdt3[3,],cdt4[3,],cdt5[3,],cdt6[3,],cdt7[3,])
cdt_cq$name<-rep("Candidate Quality",7)
cdt_cq

# LDP

cdt_ldp<-rbind(cdt1[4,],cdt2[4,],cdt3[4,],cdt4[4,],cdt5[4,],cdt6[4,],cdt7[4,])
cdt_ldp$name<-rep("LDP Affiliation",7)
cdt_ldp

cdt_cq$vn <- c("2000","2003","2005","2009","2012","2014","2017")

cdt_ldp$vn <- c("2000","2003","2005","2009","2012","2014","2017")


#'
#' Assign Order to Variable Names
#' 
levelset <- c("2000","2003","2005","2009","2012","2014","2017")

cdt_cq$vn <- factor(cdt_cq$vn, levels = rev(levelset))
cdt_ldp$vn <- factor(cdt_ldp$vn, levels = rev(levelset))


## Drop Intercept
cdt1x <- cdt_cq[!cdt_cq$vn %in% c("(Intercept)"),]

ggplot(cdt1x, aes(x=vn)) + 
  # data is cdt1x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=3) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 1) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=16, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=13, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=13, face="bold", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=13, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

###

cdt2x <- cdt_ldp[!cdt_ldp$vn %in% c("(Intercept)"),]

ggplot(cdt2x, aes(x=vn)) + 
  # data is cdt2x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("LDP Affiliation on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

################
## Plot Side by Side
################

cdt <- rbind(cdt_cq, cdt_ldp)
cdt
# Drop intercept from the output 
# (depending on your preference, you can drop ANY variables by its "vn" value)
# (If You don't want to drop any variables, delete this line)
cdtx <- cdt[!cdt$vn %in% c("(Intercept)"),]

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  facet_grid(. ~ name) +
  # facetting by the model name (name is the model variable created in the data)
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5)
        # facet strip texts
  )

#'
#' ### Draw Plot (Two Models in the same plot, with different linetype)
#' 
#' Optimized for Paper purposes

## use the same data (i.e., cdtx) as the previous plot.

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf,shape=name), size=2, 
             position=position_dodge(width=-0.5)) + 
  # plot point estimate = cf
  # point shape is differentiated by "name" == model name
  # size to control point size
  # position_dodge width to control space between two points in the same row.
  geom_errorbar(aes(ymin=lci,ymax=uci,linetype=name),width=0.3, size = 0.5,
                position=position_dodge(width=-0.5)) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # linetype is differentiated by "name" == model name
  # size to control line width
  # width to control th height of vertical lines at the edges
  # position_dodge width to control space between two lines in the same row.
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  scale_shape_discrete(name="") + 
  # Legend Title for Point Shape
  scale_linetype_discrete(name="") + 
  # Legend Title for Line Type
  xlab(NULL) + 
  # no grand label for variables
  ylab("Probit Estimate of Candidate Winning") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5),
        # facet strip texts
        legend.title = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend title text
        legend.text = element_text(size=11, face="plain", color="black",hjust=0.5),
        # legend label text
        legend.position = "top"
        # legend position. You can use "top","bottom","right","left"/like c(0.1,0.1)
  )









###########################################################################################
#########################################################
#########################
####   Plots of Models using Probability of Winning (OLS)
#########################
#########################################################
###########################################################################################


(coef1 <- coef(winc00_ols)) # coefficient 
(ci1 <- confint(winc00_ols, level=0.95)) # 95% confidence interval
cdt1 <- as.data.frame(cbind(coef1, ci1)) # make it a data
colnames(cdt1) <- c("cf","lci","uci") # new names of data
cdt1$name <- "2000 Election" # model name

(coef2 <- coef(winc03_ols)) # coefficient 
(ci2 <- confint(winc03_ols, level=0.95)) # 95% confidence interval
cdt2 <- as.data.frame(cbind(coef2, ci2)) # make it a data
colnames(cdt2) <- c("cf","lci","uci") # new names of data
cdt2$name <- "2003 Election" # model name

(coef3 <- coef(winc05_ols)) # coefficient 
(ci3 <- confint(winc05_ols, level=0.95)) # 95% confidence interval
cdt3 <- as.data.frame(cbind(coef3, ci3)) # make it a data
colnames(cdt3) <- c("cf","lci","uci") # new names of data
cdt3$name <- "2005 Election" # model name

(coef4 <- coef(winc09_ols)) # coefficient 
(ci4 <- confint(winc09_ols, level=0.95)) # 95% confidence interval
cdt4 <- as.data.frame(cbind(coef4, ci4)) # make it a data
colnames(cdt4) <- c("cf","lci","uci") # new names of data
cdt4$name <- "2009 Election" # model name

(coef5 <- coef(winc12_ols)) # coefficient 
(ci5 <- confint(winc12_ols, level=0.95)) # 95% confidence interval
cdt5 <- as.data.frame(cbind(coef5, ci5)) # make it a data
colnames(cdt5) <- c("cf","lci","uci") # new names of data
cdt5$name <- "2012 Election" # model name

(coef6 <- coef(winc14_ols)) # coefficient 
(ci6 <- confint(winc14_ols, level=0.95)) # 95% confidence interval
cdt6 <- as.data.frame(cbind(coef6, ci6)) # make it a data
colnames(cdt6) <- c("cf","lci","uci") # new names of data
cdt6$name <- "2014 Election" # model name

(coef7 <- coef(winc17_ols)) # coefficient 
(ci7 <- confint(winc17_ols, level=0.95)) # 95% confidence interval
cdt7 <- as.data.frame(cbind(coef7, ci7)) # make it a data
colnames(cdt7) <- c("cf","lci","uci") # new names of data
cdt7$name <- "2017 Election" # model name


#'
#' Extract, Combine, and Set Variable Names
#' 

# Candidate Quality

head(cdt1[4,])

cdt_cq<-rbind(cdt1[3,],cdt2[3,],cdt3[3,],cdt4[3,],cdt5[3,],cdt6[3,],cdt7[3,])
cdt_cq$name<-rep("Candidate Quality",7)
cdt_cq

# LDP

cdt_ldp<-rbind(cdt1[4,],cdt2[4,],cdt3[4,],cdt4[4,],cdt5[4,],cdt6[4,],cdt7[4,])
cdt_ldp$name<-rep("LDP Affiliation",7)
cdt_ldp

cdt_cq$vn <- c("2000","2003","2005","2009","2012","2014","2017")

cdt_ldp$vn <- c("2000","2003","2005","2009","2012","2014","2017")


#'
#' Assign Order to Variable Names
#' 
levelset <- c("2000","2003","2005","2009","2012","2014","2017")

cdt_cq$vn <- factor(cdt_cq$vn, levels = rev(levelset))
cdt_ldp$vn <- factor(cdt_ldp$vn, levels = rev(levelset))


## Drop Intercept
cdt1x <- cdt_cq[!cdt_cq$vn %in% c("(Intercept)"),]

ggplot(cdt1x, aes(x=vn)) + 
  # data is cdt1x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=3) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 1) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=16, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=13, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=13, face="bold", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=13, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

###

cdt2x <- cdt_ldp[!cdt_ldp$vn %in% c("(Intercept)"),]

ggplot(cdt2x, aes(x=vn)) + 
  # data is cdt2x, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("LDP Affiliation on Probability of Winning for Newcomers") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5)
        # x axis labels (coefficient values)
  )

################
## Plot Side by Side
################

cdt <- rbind(cdt_cq, cdt_ldp)
cdt
# Drop intercept from the output 
# (depending on your preference, you can drop ANY variables by its "vn" value)
# (If You don't want to drop any variables, delete this line)
cdtx <- cdt[!cdt$vn %in% c("(Intercept)"),]

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf),size=2) + 
  # plot point estimate = cf
  # size to control point size
  geom_errorbar(aes(ymin=lci,ymax=uci),width=0.3, size = 0.5) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # size to control line width
  # width to control th height of vertical lines at the edges
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  facet_grid(. ~ name) +
  # facetting by the model name (name is the model variable created in the data)
  xlab(NULL) + 
  # no grand label for variables
  ylab("Coefficient with 95% Confidence Interval") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=13, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=11, face="plain", hjust=0.5),
        # x axis title setting 
        axis.text.y = element_text(size=11, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=11, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=11, face="bold", color="black", hjust=0.5)
        # facet strip texts
  )

#'
#' ### Draw Plot (Two Models in the same plot, with different linetype)
#' 
#' Optimized for Paper purposes

## use the same data (i.e., cdtx) as the previous plot.

ggplot(cdtx, aes(x=vn)) + 
  # data is cdtx, y axis is variable name = vn (flip later)
  geom_point(aes(y=cf,shape=name), size=2, 
             position=position_dodge(width=-0.5)) + 
  # plot point estimate = cf
  # point shape is differentiated by "name" == model name
  # size to control point size
  # position_dodge width to control space between two points in the same row.
  geom_errorbar(aes(ymin=lci,ymax=uci,linetype=name),width=0.3, size = 0.5,
                position=position_dodge(width=-0.5)) + 
  # plot confidence interval (lower bound is lci, upper bound is uci)
  # linetype is differentiated by "name" == model name
  # size to control line width
  # width to control th height of vertical lines at the edges
  # position_dodge width to control space between two lines in the same row.
  geom_hline(aes(yintercept=0), linetype=2, size=0.5) + 
  # horizontal line at 0
  # linetype to control form of line (2 is dashed)
  # size to control line width
  scale_shape_discrete(name="") + 
  # Legend Title for Point Shape
  scale_linetype_discrete(name="") + 
  # Legend Title for Line Type
  xlab("Year") + 
  # no grand label for variables
  ylab("Change in Probability of Candidate Winning") + 
  # Label for x axis (for coefficient value)
  ggtitle("Candidate Quality and LDP Affiliation on Probability of Winning") + 
  # Title (if not needed, use NULL)
  coord_flip() + 
  # Flip Plot 
  theme_bw() + 
  theme(plot.title = element_text(size=15, face="bold", hjust=0.5),
        # plot title setting (ggtitle argument)
        axis.title.x = element_text(size=15, face="plain", hjust=0.5),
        # x axis title setting
        axis.title.y = element_text(size=15, face="plain", hjust=0.5),
        # y axis title setting
        axis.text.y = element_text(size=15, face="plain", color="black", hjust=1),
        # y axis labels (variables)
        axis.text.x = element_text(size=15, face="plain", color="black",hjust=0.5),
        # x axis labels (coefficient values)
        strip.text = element_text(size=15, face="bold", color="black", hjust=0.5),
        # facet strip texts
        legend.title = element_text(size=15, face="plain", color="black",hjust=0.5),
        # legend title text
        legend.text = element_text(size=15, face="plain", color="black",hjust=0.5),
        # legend label text
        legend.position = "top"
        # legend position. You can use "top","bottom","right","left"/like c(0.1,0.1)
  )

ggsave("canqualvpty.png",units="in", width=10, height=7,dpi=600)

#################################################################################
############################################################
#################################################################################

##### Marginal Effects

library(erer)

##

winc00<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2000 & (party_id==1 | party_id==16)))
summary(winc00)

#

winc03<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2003 & (party_id==1 | party_id==16)))
summary(winc03)

#

winc05<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2005 & (party_id==1 | party_id==16)))
summary(winc05)

#

winc09<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2009 & (party_id==1 | party_id==16)))
summary(winc09)

#

winc12<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2012 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc12)

#

winc14<-glm(win ~ incdum + cqual3 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(jtest,year==2014 & (party_id==1 | party_id==16 | party_id==35)))
summary(winc14)

winc17<-glm(win ~ incdum + cqual4 + ldpdum + coord_fail +
              oppinc,
            family = binomial(link = "probit"), x="TRUE",
            data=subset(at, party==1 | party==4 | party==7 | party==8))
summary(winc17)

maBina(winc00,digits = 3)
maBina(winc03,digits = 3)
maBina(winc05,digits = 3)
maBina(winc09,digits = 3)
maBina(winc12,digits = 3)
maBina(winc14,digits = 3)
maBina(winc17,digits = 3)

mfxboot <- function(modform,dist,data,boot=1000,digits=3){
  x <- glm(modform, family=binomial(link=dist),data)
  # get marginal effects
  pdf <- ifelse(dist=="probit",
                mean(dnorm(predict(x, type = "link"))),
                mean(dlogis(predict(x, type = "link"))))
  marginal.effects <- pdf*coef(x)
  # start bootstrap
  bootvals <- matrix(rep(NA,boot*length(coef(x))), nrow=boot)
  set.seed(1111)
  for(i in 1:boot){
    samp1 <- data[sample(1:dim(data)[1],replace=T,dim(data)[1]),]
    x1 <- glm(modform, family=binomial(link=dist),samp1)
    pdf1 <- ifelse(dist=="probit",
                   mean(dnorm(predict(x, type = "link"))),
                   mean(dlogis(predict(x, type = "link"))))
    bootvals[i,] <- pdf1*coef(x1)
  }
  res <- cbind(marginal.effects,apply(bootvals,2,sd),marginal.effects/apply(bootvals,2,sd))
  if(names(x$coefficients[1])=="(Intercept)"){
    res1 <- res[2:nrow(res),]
    res2 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res1)),nrow=dim(res1)[1])     
    rownames(res2) <- rownames(res1)
  } else {
    res2 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep="")),nrow=dim(res)[1]))
    rownames(res2) <- rownames(res)
  }
  colnames(res2) <- c("marginal.effect","standard.error","z.ratio")  
  return(res2)
}


library(AER)

dat00<-subset(jtest,year==2000 & (party_id==1 | party_id==16))
dat17<-subset(at, party==1 | party==4 | party==7 | party==8)

mfx1 <- mfxboot(win ~ incdum + cqual4 + ldpdum + coord_fail + oppinc,"probit",dat17)
mfx2 <- mfxboot(win ~ incdum + cqual4 + ldpdum + coord_fail + oppinc,"logit",dat17)
mfx3 <- mfxboot(win ~ incdum + cqual4 + ldpdum + coord_fail + oppinc,"probit",dat17,boot=100,digits=4)
mfxdat <- data.frame(cbind(rownames(mfx1),mfx1))
mfxdat$me <- as.numeric(as.character(mfxdat$marginal.effect))
mfxdat$se <- as.numeric(as.character(mfxdat$standard.error))
# coefplot

ggplot(mfxdat, aes(V1, marginal.effect,ymin = me - 2*se,ymax= me + 2*se)) +
  scale_x_discrete('Variable') +
  scale_y_continuous('Marginal Effect',limits=c(-0.5,1)) +
  theme_bw() + 
  geom_errorbar(aes(x = V1, y = me),size=.3,width=.2) + 
  geom_point(aes(x = V1, y = me)) +
  geom_hline(yintercept=0) + 
  coord_flip() +
  labs(title="Marginal Effects with 95% Confidence Intervals")

#################################################################################
############################################################
#################################################################################

##### What Do Voters Care About More?

v09<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japanese Voter Survey Data\\2009 Post Election Survey\\2009 Post Election Survey.csv")
v12<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japanese Voter Survey Data\\2012 Post Election Survey\\2012 Post Election Survey.csv")
v14<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japanese Voter Survey Data\\2014 Post Election Survey\\2014 Post Election Survey.csv")
v17<-read.csv("C:\\Users\\jorda\\Documents\\Data Sets\\Japanese Voter Survey Data\\2017 Post Election Survey\\2017 Post Election Survey.csv")


cvp09<-v09$q7s9
hist(cvp09)

cvp12<-v12$q4s2[v12$q4s2<5]
hist(na.omit(cvp12))

cvp14<-v14$Q10SQ2[v14$Q10SQ2<5]
hist(na.omit(cvp14))

cvp17<-v17$Q10SQ3[v17$Q10SQ3<5]
hist(na.omit(cvp17))



per<-c(.43,.46,.47,.5,
       sum(table(cvp09[cvp09==1]))/sum(table(cvp09)),
       sum(table(cvp12[cvp12==1]))/sum(table(cvp12)),
       sum(table(cvp14[cvp14==1]))/sum(table(cvp14)),
       sum(table(cvp17[cvp17==1]))/sum(table(cvp17)),
       .43,.42,.36,.35,
       sum(table(cvp09[cvp09==2]))/sum(table(cvp09)),
       sum(table(cvp12[cvp12==2]))/sum(table(cvp12)),
       sum(table(cvp14[cvp14==2]))/sum(table(cvp14)),
       sum(table(cvp17[cvp17==2]))/sum(table(cvp17)))

cvp<-c("Party","Party","Party","Party","Party","Party","Party","Party",
       "Candidate","Candidate","Candidate","Candidate","Candidate","Candidate",
       "Candidate","Candidate")
year<-c("1996","2000","2003","2005","2009","2012","2014","2017")

cvpdat<-data.frame(per,cvp,year)

theme_update(text = element_text(size=15))
ggplot(cvpdat,aes(x=year, y=per, fill=cvp)) +
  geom_bar(stat="identity",position = 'dodge') +
  ylim(0,0.8) +
  labs(title="Candidates or Parties? Which Matters Most to Voters?",
       x="Year",
       y="Percentage of Respondents",
       fill = "Which Matters Most?")

ggsave("canorpty.png",units="in", width=10, height=8,dpi=600)
